sampleDists <- dist(t(assay(rld)))
plot(hclust(sampleDists))annot = dplyr::select(experimental_metadata, condition)
row.names(annot) = experimental_metadata$sample_id
rld %>%
assay() %>%
cor() %>%
pheatmap(color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
annotation = annot,
annotation_colors = list(condition = c(Control = "#BF3100", OKSM = "#E9B44C",
Senolytic = "#1B998B", Senolytic_OKSM = "#5D576B")),
cluster_rows = TRUE,
cluster_cols = T,
cellwidth = 13,
cellheight = 13)ntop = 500
rv <- rowVars(assay(rld))
select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
pca = prcomp(t(assay(rld)[select,]))
percentVar <- pca$sdev^2/sum(pca$sdev^2)
pca_data <- plotPCA(rld, intgroup = c("condition", "replicate"), returnData=TRUE)
percentVar <- round(100 * attr(pca_data, "percentVar"), digits=2)
ggplot(pca_data, aes(PC1, PC2, color=condition, shape=replicate)) + geom_point(size=3) +
scale_x_continuous(paste0("PC1: ",percentVar[1],"% variance")) +
scale_y_continuous(paste0("PC2: ",percentVar[2],"% variance")) +
coord_fixed() + theme_classic() + geom_text(data = pca_data, aes(PC1,PC2, label = name), hjust = 1.2)effective_lengths = matrix(0, ncol=length(experimental_metadata$sample_id), nrow=17714)
colnames(effective_lengths)= experimental_metadata$sample_id
for( i in experimental_metadata$sample_id){
effective_lengths[,i] = read.table(paste("data/isc/", i, ".genes.results",sep=""), sep="\t", header=TRUE)$effective_length
}
row.names(effective_lengths) = read.table(paste("data/isc/", i, ".genes.results",sep=""), sep="\t", header=TRUE)$gene_id
effective_lengths = rowMeans(effective_lengths[row.names(counts(dds)),])
ncrpk = counts(dds) / (effective_lengths / 1000)
ncrpk = apply(ncrpk, c(1,2), function(x){if(is.nan(x)){0}else{x}})
ncrpk = apply(ncrpk, c(1,2), function(x){if(is.infinite(x)){0}else{x}})
ncscalingfactor = colSums(ncrpk) / 1e6
nctpm = sweep(ncrpk, 2, ncscalingfactor, "/")
nctpm.melt = melt(nctpm)
ggplot(nctpm.melt, aes(x=Var2, y=value)) + geom_boxplot() + theme_classic() + theme(axis.text.x = element_text(angle = 90, colour="black", hjust = 1)) + scale_x_discrete("Sample") + scale_y_continuous("TPM")tpm.threshold = 20000
test.tpm = apply(nctpm, 1, function(x){ any(x> tpm.threshold) })
ensembl.genes[names(test.tpm[test.tpm])] %>%
as.data.frame() %>%
datatable(options = list(scrollX = TRUE))generate_de_section(dds_wald, "OKSM", "Control")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 1258"
generate_de_section(dds_wald, "Senolytic", "Control")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 1399"
generate_de_section(dds_wald, "Senolytic_OKSM", "Control")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 1311"
generate_de_section(dds_wald, "OKSM", "Senolytic")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 124"
generate_de_section(dds_wald, "OKSM", "Senolytic_OKSM")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 109"
generate_de_section(dds_wald, "Senolytic", "Senolytic_OKSM")## [1] "Number of significant genes (padj < 0.1 & log2FoldChange < log2(1.5)): 0"
dds_LRT = nbinomLRT(dds, reduced = ~1)
res = results(dds_LRT)
res$gene_biotype= ensembl.genes$gene_biotype[match(row.names(res), ensembl.genes$gene_id)]
res$external_gene_name= ensembl.genes$external_gene_name[match(row.names(res), ensembl.genes$gene_id)]
hist(res$pvalue) Number of significant genes (padj < 0.1):
sum(res$padj < 0.1, na.rm=T)## [1] 2967
# assay(x) to access the count data
sig_mat_rld = assay(significant_rld)
# The apply function swaps the rows to samples and columns to genes -- the standard is the other way around: samples in cols and genes in rows, hence the transpose function
zscores = t(apply(sig_mat_rld, 1, function(x){ (x - mean(x)) / sd(x) }))foo = as(zscores, "matrix")
bar = sapply(1:10, function(x){kmeans(foo, centers=x)$tot.withinss})
plot(bar, type="l")pam_clust <- generate_data(zscores, 7, "pam")
saveRDS(pam_clust, "output/isc/pam_clust.rds")
# pam_clust <- as.data.frame(pam_clust)
# pam_clust$Cluster <- factor(pam_clust$Cluster, levels = c(5,4,3,1,2,6))
# pam_clust <- pam_clust[order(pam_clust$Cluster),]
pheatmap(pam_clust[,1:(ncol(pam_clust)-1)],
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdBu")))(100),
fontsize_row = 5.5,
annotation_col = annotation,
annotation_colors = anno_colours,
cluster_rows = FALSE,
cluster_cols = FALSE)pam_clust_df <- pam_clust %>%
as.data.frame() %>%
mutate(gene_name = ensembl.genes[rownames(.),]$external_gene_name) %>%
dplyr::select(gene_name, Cluster) %>%
dplyr::rename("Gene Name" = gene_name)
datatable(pam_clust_df, options = list(scrollX = TRUE), class = "white-space: nowrap")c1 <- pam_clust_df[pam_clust_df$Cluster == 1, ] %>%
dplyr::select(-Cluster)
c2 <- pam_clust_df[pam_clust_df$Cluster == 2, ] %>%
dplyr::select(-Cluster)
c3 <- pam_clust_df[pam_clust_df$Cluster == 3, ] %>%
dplyr::select(-Cluster)
c4 <- pam_clust_df[pam_clust_df$Cluster == 4, ] %>%
dplyr::select(-Cluster)
c5 <- pam_clust_df[pam_clust_df$Cluster == 5, ] %>%
dplyr::select(-Cluster)
c6 <- pam_clust_df[pam_clust_df$Cluster == 6, ] %>%
dplyr::select(-Cluster)
c7 <- pam_clust_df[pam_clust_df$Cluster == 7, ] %>%
dplyr::select(-Cluster)
data.frame(Cluster = c("Cluster 1", "Cluster 2", "Cluster 3", "Cluster 4",
"Cluster 5", "Cluster 6", "Cluster 7", "Total"),
Number_of_genes = c(nrow(c1), nrow(c2), nrow(c3), nrow(c4),
nrow(c5), nrow(c6), nrow(c7),
sum(c(nrow(c1),nrow(c2),nrow(c3),nrow(c4),
nrow(c5),nrow(c6),nrow(c7)))))## Cluster Number_of_genes
## 1 Cluster 1 773
## 2 Cluster 2 397
## 3 Cluster 3 128
## 4 Cluster 4 566
## 5 Cluster 5 613
## 6 Cluster 6 248
## 7 Cluster 7 242
## 8 Total 2967
## Checking the stability of clusters
#fviz_nbclust(zscores, kmeans, method = "silhouette")
set.seed(2)
dd = as.dist((1 - cor(t(zscores)))/2)
pam = pam(dd, 7, diss = TRUE)
sil = silhouette(pam$clustering, dd)
plot(sil, border=NA, main = "Silhouette Plot for 7 Clusters")#listEnrichrSites()
setEnrichrSite("FlyEnrichr")
dbs <- listEnrichrDbs()
to_check <- c("GO_Biological_Process_2018", "KEGG_2019")
output_dir <- "output/isc/"eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$GO_Biological_Process_2018, output_dir, "isc_c1_go")
c1_go <- get_important_terms(eresList$GO_Biological_Process_2018)eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$KEGG_2019, output_dir, "isc_c1_kegg")
c1_kegg <- get_important_terms(eresList$KEGG_2019)eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$GO_Biological_Process_2018, output_dir, "isc_c2_go")
c2_go <- get_important_terms(eresList$GO_Biological_Process_2018)eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$KEGG_2019, output_dir, "isc_c2_kegg")
c2_kegg <- get_important_terms(eresList$KEGG_2019)eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$GO_Biological_Process_2018, output_dir, "isc_c3_go")
c3_go <- get_important_terms(eresList$GO_Biological_Process_2018)eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$KEGG_2019, output_dir, "isc_c3_kegg")
c3_kegg <- get_important_terms(eresList$KEGG_2019)eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$GO_Biological_Process_2018, output_dir, "isc_c4_go")
c4_go <- get_important_terms(eresList$GO_Biological_Process_2018)eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$KEGG_2019, output_dir, "isc_c4_kegg")
c4_kegg <- get_important_terms(eresList$KEGG_2019)eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$GO_Biological_Process_2018, output_dir, "isc_c5_go")
c5_go <- get_important_terms(eresList$GO_Biological_Process_2018)eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$KEGG_2019, output_dir, "isc_c5_kegg")
c5_kegg <- get_important_terms(eresList$KEGG_2019)eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$GO_Biological_Process_2018, output_dir, "isc_c6_go")
c6_go <- get_important_terms(eresList$GO_Biological_Process_2018)eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$KEGG_2019, output_dir, "isc_c6_kegg")
c6_kegg <- get_important_terms(eresList$KEGG_2019)eresList$GO_Biological_Process_2018 %>%
plot_enrichr("GO_Biological_Process_2018")datatable(eresList[[1]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$GO_Biological_Process_2018, output_dir, "isc_c7_go")
c7_go <- get_important_terms(eresList$GO_Biological_Process_2018)eresList$KEGG_2019 %>%
plot_enrichr("KEGG_2019") datatable(eresList[[2]], options = list(scrollX = TRUE), class = "white-space: nowrap")get_important_terms_rds(eresList$KEGG_2019, output_dir, "isc_c7_kegg")
c7_kegg <- get_important_terms(eresList$KEGG_2019)all_go = list(c1 = c1_go, c2=c2_go, c3=c3_go, c4=c4_go, c5=c5_go, c6=c6_go, c7=c7_go)
all_go <- all_go %>%
bind_rows(.id = "Cluster") %>%
group_by(Cluster) %>%
slice_min(order_by = Adjusted.P.value, n = 10)all_go %>%
mutate(Term = gsub("\\([^()]*\\)", "", Term),
Term = str_to_title(Term)) %>%
mutate(Cluster = factor(Cluster, levels = c(unique(all_go$Cluster)))) %>%
group_by(Cluster) %>%
mutate(Term = factor(Term, levels = Term)) %>%
ggplot(aes(x = Cluster, y = Term)) +
geom_point(aes(colour = Adjusted.P.value, size = Ratio)) +
ylab(NULL) +
xlab("Cluster") +
scale_color_continuous(low="red", high="blue", guide=guide_colorbar(reverse=FALSE)) +
scale_y_discrete(labels = function(x) str_wrap(x, width = 60),
limits=rev) +
#labs(fill = "Adjusted p-value") +
guides(colour = guide_colorbar(title = "Adjusted p-value", reverse = TRUE),
size = guide_legend(title = "Gene Ratio")) +
theme(axis.text.x = element_text(size=8)) +
theme_bw() +
ggtitle("GO Enrichment Analysis")all_kegg = list(c1 = c1_kegg, c2=c2_kegg, c3=c3_kegg, c4=c4_kegg, c5=c5_kegg, c6=c6_kegg, c7=c7_kegg)
all_kegg <- all_kegg %>%
bind_rows(.id = "Cluster") %>%
group_by(Cluster) %>%
slice_min(order_by = Adjusted.P.value, n = 10)all_kegg %>%
mutate(Term = gsub("\\([^()]*\\)", "", Term),
Term = str_to_title(Term)) %>%
mutate(Cluster = factor(Cluster, levels = c(unique(all_kegg$Cluster)))) %>%
group_by(Cluster) %>%
mutate(Term = factor(Term, levels = Term)) %>%
ggplot(aes(x = Cluster, y = Term)) +
geom_point(aes(colour = Adjusted.P.value, size = Ratio)) +
ylab(NULL) +
xlab("Cluster") +
scale_color_continuous(low="red", high="blue", guide=guide_colorbar(reverse=FALSE)) +
scale_y_discrete(labels = function(x) str_wrap(x, width = 60),
limits=rev) +
#labs(fill = "Adjusted p-value") +
guides(colour = guide_colorbar(title = "Adjusted p-value", reverse = TRUE),
size = guide_legend(title = "Gene Ratio")) +
theme(axis.text.x = element_text(size=8)) +
theme_bw() +
ggtitle("GO Enrichment Analysis")sessionInfo()## R version 4.0.5 (2021-03-31)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] enrichR_3.0 DT_0.17
## [3] RColorBrewer_1.1-2 scales_1.1.1
## [5] reshape2_1.4.4 knitr_1.33
## [7] biomaRt_2.46.3 GenomicFeatures_1.42.3
## [9] AnnotationDbi_1.52.0 genefilter_1.72.1
## [11] DESeq2_1.30.1 SummarizedExperiment_1.20.0
## [13] Biobase_2.50.0 MatrixGenerics_1.2.1
## [15] matrixStats_0.58.0 BSgenome.Dmelanogaster.UCSC.dm6_1.4.1
## [17] BSgenome_1.58.0 rtracklayer_1.50.0
## [19] GenomicRanges_1.42.0 GenomeInfoDb_1.26.7
## [21] Biostrings_2.58.0 XVector_0.30.0
## [23] IRanges_2.24.1 S4Vectors_0.28.1
## [25] BiocGenerics_0.36.0 forcats_0.5.1
## [27] stringr_1.4.0 dplyr_1.0.5
## [29] purrr_0.3.4 readr_1.4.0
## [31] tidyr_1.1.3 tibble_3.1.0
## [33] tidyverse_1.3.0 EnhancedVolcano_1.8.0
## [35] ggrepel_0.9.1 ggplot2_3.3.3
## [37] pheatmap_1.0.12 cluster_2.1.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 BiocFileCache_1.14.0
## [4] plyr_1.8.6 splines_4.0.5 crosstalk_1.1.1
## [7] BiocParallel_1.24.1 digest_0.6.27 htmltools_0.5.1.1
## [10] fansi_0.4.2 magrittr_2.0.1 memoise_2.0.0
## [13] openxlsx_4.2.3 annotate_1.68.0 modelr_0.1.8
## [16] extrafont_0.17 extrafontdb_1.0 askpass_1.1
## [19] prettyunits_1.1.1 colorspace_2.0-0 blob_1.2.1
## [22] rvest_1.0.0 rappdirs_0.3.3 haven_2.3.1
## [25] xfun_0.22 crayon_1.4.1 RCurl_1.98-1.3
## [28] jsonlite_1.7.2 survival_3.2-10 glue_1.4.2
## [31] gtable_0.3.0 zlibbioc_1.36.0 DelayedArray_0.16.3
## [34] proj4_1.0-10.1 car_3.0-10 Rttf2pt1_1.3.8
## [37] maps_3.3.0 abind_1.4-5 DBI_1.1.1
## [40] rstatix_0.7.0 Rcpp_1.0.6 xtable_1.8-4
## [43] progress_1.2.2 foreign_0.8-81 bit_4.0.4
## [46] htmlwidgets_1.5.3 httr_1.4.2 ellipsis_0.3.1
## [49] farver_2.1.0 pkgconfig_2.0.3 XML_3.99-0.6
## [52] sass_0.3.1 dbplyr_2.1.1 locfit_1.5-9.4
## [55] utf8_1.2.1 labeling_0.4.2 tidyselect_1.1.0
## [58] rlang_0.4.10 munsell_0.5.0 cellranger_1.1.0
## [61] tools_4.0.5 cachem_1.0.4 cli_2.4.0
## [64] generics_0.1.0 RSQLite_2.2.6 broom_0.7.6
## [67] evaluate_0.14 fastmap_1.1.0 yaml_2.2.1
## [70] bit64_4.0.5 fs_1.5.0 zip_2.1.1
## [73] ash_1.0-15 ggrastr_0.2.3 xml2_1.3.2
## [76] compiler_4.0.5 rstudioapi_0.13 beeswarm_0.3.1
## [79] curl_4.3 ggsignif_0.6.1 reprex_2.0.0
## [82] geneplotter_1.68.0 bslib_0.2.4 stringi_1.5.3
## [85] highr_0.8 ggalt_0.4.0 lattice_0.20-41
## [88] Matrix_1.3-2 vctrs_0.3.7 pillar_1.6.0
## [91] lifecycle_1.0.0 jquerylib_0.1.3 data.table_1.14.0
## [94] bitops_1.0-6 R6_2.5.0 rio_0.5.26
## [97] KernSmooth_2.23-18 vipor_0.4.5 MASS_7.3-53.1
## [100] assertthat_0.2.1 rjson_0.2.20 openssl_1.4.3
## [103] rprojroot_2.0.2 withr_2.4.1 GenomicAlignments_1.26.0
## [106] Rsamtools_2.6.0 GenomeInfoDbData_1.2.4 hms_1.0.0
## [109] grid_4.0.5 rmarkdown_2.7 carData_3.0-4
## [112] ggpubr_0.4.0 lubridate_1.7.10 ggbeeswarm_0.6.0